{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "05c21278",
   "metadata": {},
   "source": [
    "# 导入数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "abd16805",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "5cbd35ee",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>School</th>\n",
       "      <th>Grade</th>\n",
       "      <th>Name</th>\n",
       "      <th>Gender</th>\n",
       "      <th>Height</th>\n",
       "      <th>Weight</th>\n",
       "      <th>Transfer</th>\n",
       "      <th>Test_Number</th>\n",
       "      <th>Test_Date</th>\n",
       "      <th>Time_Record</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Shanghai Jiao Tong University</td>\n",
       "      <td>Freshman</td>\n",
       "      <td>Gaopeng Yang</td>\n",
       "      <td>Female</td>\n",
       "      <td>158.9</td>\n",
       "      <td>46.0</td>\n",
       "      <td>N</td>\n",
       "      <td>1</td>\n",
       "      <td>2019/10/5</td>\n",
       "      <td>0:04:34</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Peking University</td>\n",
       "      <td>Freshman</td>\n",
       "      <td>Changqiang You</td>\n",
       "      <td>Male</td>\n",
       "      <td>166.5</td>\n",
       "      <td>70.0</td>\n",
       "      <td>N</td>\n",
       "      <td>1</td>\n",
       "      <td>2019/9/4</td>\n",
       "      <td>0:04:20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Shanghai Jiao Tong University</td>\n",
       "      <td>Senior</td>\n",
       "      <td>Mei Sun</td>\n",
       "      <td>Male</td>\n",
       "      <td>188.9</td>\n",
       "      <td>89.0</td>\n",
       "      <td>N</td>\n",
       "      <td>2</td>\n",
       "      <td>2019/9/12</td>\n",
       "      <td>0:05:22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Fudan University</td>\n",
       "      <td>Sophomore</td>\n",
       "      <td>Xiaojuan Sun</td>\n",
       "      <td>Female</td>\n",
       "      <td>NaN</td>\n",
       "      <td>41.0</td>\n",
       "      <td>N</td>\n",
       "      <td>2</td>\n",
       "      <td>2020/1/3</td>\n",
       "      <td>0:04:08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Fudan University</td>\n",
       "      <td>Sophomore</td>\n",
       "      <td>Gaojuan You</td>\n",
       "      <td>Male</td>\n",
       "      <td>174.0</td>\n",
       "      <td>74.0</td>\n",
       "      <td>N</td>\n",
       "      <td>2</td>\n",
       "      <td>2019/11/6</td>\n",
       "      <td>0:05:22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>195</th>\n",
       "      <td>Fudan University</td>\n",
       "      <td>Junior</td>\n",
       "      <td>Xiaojuan Sun</td>\n",
       "      <td>Female</td>\n",
       "      <td>153.9</td>\n",
       "      <td>46.0</td>\n",
       "      <td>N</td>\n",
       "      <td>2</td>\n",
       "      <td>2019/10/17</td>\n",
       "      <td>0:04:31</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>196</th>\n",
       "      <td>Tsinghua University</td>\n",
       "      <td>Senior</td>\n",
       "      <td>Li Zhao</td>\n",
       "      <td>Female</td>\n",
       "      <td>160.9</td>\n",
       "      <td>50.0</td>\n",
       "      <td>N</td>\n",
       "      <td>3</td>\n",
       "      <td>2019/9/22</td>\n",
       "      <td>0:04:03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>197</th>\n",
       "      <td>Shanghai Jiao Tong University</td>\n",
       "      <td>Senior</td>\n",
       "      <td>Chengqiang Chu</td>\n",
       "      <td>Female</td>\n",
       "      <td>153.9</td>\n",
       "      <td>45.0</td>\n",
       "      <td>N</td>\n",
       "      <td>1</td>\n",
       "      <td>2020/1/5</td>\n",
       "      <td>0:04:48</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>198</th>\n",
       "      <td>Shanghai Jiao Tong University</td>\n",
       "      <td>Senior</td>\n",
       "      <td>Chengmei Shen</td>\n",
       "      <td>Male</td>\n",
       "      <td>175.3</td>\n",
       "      <td>71.0</td>\n",
       "      <td>N</td>\n",
       "      <td>2</td>\n",
       "      <td>2020/1/7</td>\n",
       "      <td>0:04:58</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>199</th>\n",
       "      <td>Tsinghua University</td>\n",
       "      <td>Sophomore</td>\n",
       "      <td>Chunpeng Lv</td>\n",
       "      <td>Male</td>\n",
       "      <td>155.7</td>\n",
       "      <td>51.0</td>\n",
       "      <td>N</td>\n",
       "      <td>1</td>\n",
       "      <td>2019/11/6</td>\n",
       "      <td>0:05:05</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>200 rows × 10 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                            School      Grade            Name  Gender  Height  \\\n",
       "0    Shanghai Jiao Tong University   Freshman    Gaopeng Yang  Female   158.9   \n",
       "1                Peking University   Freshman  Changqiang You    Male   166.5   \n",
       "2    Shanghai Jiao Tong University     Senior         Mei Sun    Male   188.9   \n",
       "3                 Fudan University  Sophomore    Xiaojuan Sun  Female     NaN   \n",
       "4                 Fudan University  Sophomore     Gaojuan You    Male   174.0   \n",
       "..                             ...        ...             ...     ...     ...   \n",
       "195               Fudan University     Junior    Xiaojuan Sun  Female   153.9   \n",
       "196            Tsinghua University     Senior         Li Zhao  Female   160.9   \n",
       "197  Shanghai Jiao Tong University     Senior  Chengqiang Chu  Female   153.9   \n",
       "198  Shanghai Jiao Tong University     Senior   Chengmei Shen    Male   175.3   \n",
       "199            Tsinghua University  Sophomore     Chunpeng Lv    Male   155.7   \n",
       "\n",
       "     Weight Transfer  Test_Number   Test_Date Time_Record  \n",
       "0      46.0        N            1   2019/10/5     0:04:34  \n",
       "1      70.0        N            1    2019/9/4     0:04:20  \n",
       "2      89.0        N            2   2019/9/12     0:05:22  \n",
       "3      41.0        N            2    2020/1/3     0:04:08  \n",
       "4      74.0        N            2   2019/11/6     0:05:22  \n",
       "..      ...      ...          ...         ...         ...  \n",
       "195    46.0        N            2  2019/10/17     0:04:31  \n",
       "196    50.0        N            3   2019/9/22     0:04:03  \n",
       "197    45.0        N            1    2020/1/5     0:04:48  \n",
       "198    71.0        N            2    2020/1/7     0:04:58  \n",
       "199    51.0        N            1   2019/11/6     0:05:05  \n",
       "\n",
       "[200 rows x 10 columns]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv(\"C:/Users/pc/3.22/data_analysis-master/week02/data/learn_pandas.csv\")\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "f518d129",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>School</th>\n",
       "      <th>Grade</th>\n",
       "      <th>Name</th>\n",
       "      <th>Gender</th>\n",
       "      <th>Height</th>\n",
       "      <th>Weight</th>\n",
       "      <th>Transfer</th>\n",
       "      <th>Test_Number</th>\n",
       "      <th>Test_Date</th>\n",
       "      <th>Time_Record</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Shanghai Jiao Tong University</td>\n",
       "      <td>Freshman</td>\n",
       "      <td>Gaopeng Yang</td>\n",
       "      <td>Female</td>\n",
       "      <td>158.9</td>\n",
       "      <td>46.0</td>\n",
       "      <td>N</td>\n",
       "      <td>1</td>\n",
       "      <td>2019/10/5</td>\n",
       "      <td>0:04:34</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Peking University</td>\n",
       "      <td>Freshman</td>\n",
       "      <td>Changqiang You</td>\n",
       "      <td>Male</td>\n",
       "      <td>166.5</td>\n",
       "      <td>70.0</td>\n",
       "      <td>N</td>\n",
       "      <td>1</td>\n",
       "      <td>2019/9/4</td>\n",
       "      <td>0:04:20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Shanghai Jiao Tong University</td>\n",
       "      <td>Senior</td>\n",
       "      <td>Mei Sun</td>\n",
       "      <td>Male</td>\n",
       "      <td>188.9</td>\n",
       "      <td>89.0</td>\n",
       "      <td>N</td>\n",
       "      <td>2</td>\n",
       "      <td>2019/9/12</td>\n",
       "      <td>0:05:22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Fudan University</td>\n",
       "      <td>Sophomore</td>\n",
       "      <td>Xiaojuan Sun</td>\n",
       "      <td>Female</td>\n",
       "      <td>NaN</td>\n",
       "      <td>41.0</td>\n",
       "      <td>N</td>\n",
       "      <td>2</td>\n",
       "      <td>2020/1/3</td>\n",
       "      <td>0:04:08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Fudan University</td>\n",
       "      <td>Sophomore</td>\n",
       "      <td>Gaojuan You</td>\n",
       "      <td>Male</td>\n",
       "      <td>174.0</td>\n",
       "      <td>74.0</td>\n",
       "      <td>N</td>\n",
       "      <td>2</td>\n",
       "      <td>2019/11/6</td>\n",
       "      <td>0:05:22</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                          School      Grade            Name  Gender  Height  \\\n",
       "0  Shanghai Jiao Tong University   Freshman    Gaopeng Yang  Female   158.9   \n",
       "1              Peking University   Freshman  Changqiang You    Male   166.5   \n",
       "2  Shanghai Jiao Tong University     Senior         Mei Sun    Male   188.9   \n",
       "3               Fudan University  Sophomore    Xiaojuan Sun  Female     NaN   \n",
       "4               Fudan University  Sophomore     Gaojuan You    Male   174.0   \n",
       "\n",
       "   Weight Transfer  Test_Number  Test_Date Time_Record  \n",
       "0    46.0        N            1  2019/10/5     0:04:34  \n",
       "1    70.0        N            1   2019/9/4     0:04:20  \n",
       "2    89.0        N            2  2019/9/12     0:05:22  \n",
       "3    41.0        N            2   2020/1/3     0:04:08  \n",
       "4    74.0        N            2  2019/11/6     0:05:22  "
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "89879613",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 200 entries, 0 to 199\n",
      "Data columns (total 10 columns):\n",
      " #   Column       Non-Null Count  Dtype  \n",
      "---  ------       --------------  -----  \n",
      " 0   School       200 non-null    object \n",
      " 1   Grade        200 non-null    object \n",
      " 2   Name         200 non-null    object \n",
      " 3   Gender       200 non-null    object \n",
      " 4   Height       183 non-null    float64\n",
      " 5   Weight       189 non-null    float64\n",
      " 6   Transfer     188 non-null    object \n",
      " 7   Test_Number  200 non-null    int64  \n",
      " 8   Test_Date    200 non-null    object \n",
      " 9   Time_Record  200 non-null    object \n",
      "dtypes: float64(2), int64(1), object(7)\n",
      "memory usage: 15.8+ KB\n"
     ]
    }
   ],
   "source": [
    "df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "40cc3fdd",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "bb2c10dc",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Gender\n",
       "Female    159.6\n",
       "Male      173.4\n",
       "Name: Height, dtype: float64"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby('Gender')['Height'].median()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "fbdfa80c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "School                         Gender\n",
       "Fudan University               Female    159.10\n",
       "                               Male      175.95\n",
       "Peking University              Female    159.90\n",
       "                               Male      171.00\n",
       "Shanghai Jiao Tong University  Female    159.15\n",
       "                               Male      176.40\n",
       "Tsinghua University            Female    160.70\n",
       "                               Male      170.40\n",
       "Name: Height, dtype: float64"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby(['School','Gender'])['Height'].median()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "ee5ade67",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-23-a3d8d702e4dc>:6: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  df_hurun['价值（亿元人民币）'] = df_hurun['价值（亿元人民币）'].astype('int64')\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>排名</th>\n",
       "      <th>排名变化</th>\n",
       "      <th>企业名称</th>\n",
       "      <th>价值（亿元人民币）</th>\n",
       "      <th>价值变化（亿元人民币）</th>\n",
       "      <th>国家</th>\n",
       "      <th>城市</th>\n",
       "      <th>行业</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>抖音</td>\n",
       "      <td>13400</td>\n",
       "      <td>-10050</td>\n",
       "      <td>中国</td>\n",
       "      <td>北京</td>\n",
       "      <td>社交媒体</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>SpaceX</td>\n",
       "      <td>8400</td>\n",
       "      <td>1680</td>\n",
       "      <td>美国</td>\n",
       "      <td>洛杉矶</td>\n",
       "      <td>航天</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>-1</td>\n",
       "      <td>蚂蚁集团</td>\n",
       "      <td>8000</td>\n",
       "      <td>-2010</td>\n",
       "      <td>中国</td>\n",
       "      <td>杭州</td>\n",
       "      <td>金融科技</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>Stripe</td>\n",
       "      <td>4100</td>\n",
       "      <td>-2210</td>\n",
       "      <td>美国</td>\n",
       "      <td>旧金山</td>\n",
       "      <td>金融科技</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>5</td>\n",
       "      <td>11</td>\n",
       "      <td>Shein</td>\n",
       "      <td>4000</td>\n",
       "      <td>2680</td>\n",
       "      <td>中国</td>\n",
       "      <td>广州</td>\n",
       "      <td>电子商务</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>97</th>\n",
       "      <td>95</td>\n",
       "      <td>-16</td>\n",
       "      <td>Impossible 食品</td>\n",
       "      <td>470</td>\n",
       "      <td>0</td>\n",
       "      <td>美国</td>\n",
       "      <td>雷德伍德城</td>\n",
       "      <td>食品饮料</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>98</th>\n",
       "      <td>95</td>\n",
       "      <td>-16</td>\n",
       "      <td>微医</td>\n",
       "      <td>470</td>\n",
       "      <td>0</td>\n",
       "      <td>中国</td>\n",
       "      <td>杭州</td>\n",
       "      <td>健康科技</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>99</th>\n",
       "      <td>99</td>\n",
       "      <td>58</td>\n",
       "      <td>蜂巢能源</td>\n",
       "      <td>460</td>\n",
       "      <td>190</td>\n",
       "      <td>中国</td>\n",
       "      <td>常州</td>\n",
       "      <td>新能源</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>100</th>\n",
       "      <td>99</td>\n",
       "      <td>-6</td>\n",
       "      <td>Better.com</td>\n",
       "      <td>460</td>\n",
       "      <td>60</td>\n",
       "      <td>美国</td>\n",
       "      <td>纽约</td>\n",
       "      <td>金融科技</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>101</th>\n",
       "      <td>99</td>\n",
       "      <td>-20</td>\n",
       "      <td>Automation Anywhere</td>\n",
       "      <td>460</td>\n",
       "      <td>-10</td>\n",
       "      <td>美国</td>\n",
       "      <td>圣何塞</td>\n",
       "      <td>人工智能</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>101 rows × 8 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     排名 排名变化                 企业名称  价值（亿元人民币） 价值变化（亿元人民币）  国家     城市    行业\n",
       "1     1    0                   抖音      13400      -10050  中国     北京  社交媒体\n",
       "2     2    1               SpaceX       8400        1680  美国    洛杉矶    航天\n",
       "3     3   -1                 蚂蚁集团       8000       -2010  中国     杭州  金融科技\n",
       "4     4    0               Stripe       4100       -2210  美国    旧金山  金融科技\n",
       "5     5   11                Shein       4000        2680  中国     广州  电子商务\n",
       "..   ..  ...                  ...        ...         ...  ..    ...   ...\n",
       "97   95  -16        Impossible 食品        470           0  美国  雷德伍德城  食品饮料\n",
       "98   95  -16                   微医        470           0  中国     杭州  健康科技\n",
       "99   99   58                 蜂巢能源        460         190  中国     常州   新能源\n",
       "100  99   -6           Better.com        460          60  美国     纽约  金融科技\n",
       "101  99  -20  Automation Anywhere        460         -10  美国    圣何塞  人工智能\n",
       "\n",
       "[101 rows x 8 columns]"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "hurun_独角兽=pd.read_html('https://www.hurun.net/zh-CN/Info/Detail?num=L9SQPH9FKJB1')[-3]\n",
    "hurun_独角兽[0:1].values.tolist()[0]\n",
    "df_hurun = hurun_独角兽[1:]\n",
    "df_hurun\n",
    "df_hurun.columns = hurun_独角兽[0:1].values.tolist()[0]\n",
    "df_hurun['价值（亿元人民币）'] = df_hurun['价值（亿元人民币）'].astype('int64')\n",
    "df_hurun"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ca110698",
   "metadata": {},
   "source": [
    "* 不同行业类别国家的估值情况对比"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "id": "7d28b760",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>价值（亿元人民币）</th>\n",
       "      <th>行业</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>国家</th>\n",
       "      <th>行业</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"17\" valign=\"top\">中国</th>\n",
       "      <th>人工智能</th>\n",
       "      <td>570</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>企业服务</th>\n",
       "      <td>515</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>保险</th>\n",
       "      <td>740</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>健康科技</th>\n",
       "      <td>1510</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>共享经济</th>\n",
       "      <td>965</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>大数据</th>\n",
       "      <td>535</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>数字科技</th>\n",
       "      <td>2000</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>新能源</th>\n",
       "      <td>1770</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>新能源汽车</th>\n",
       "      <td>600</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>新零售</th>\n",
       "      <td>670</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>机器人</th>\n",
       "      <td>1200</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>物流</th>\n",
       "      <td>3170</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>电子商务</th>\n",
       "      <td>5340</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>社交媒体</th>\n",
       "      <td>13400</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>软件服务</th>\n",
       "      <td>1870</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>金融科技</th>\n",
       "      <td>10200</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>食品饮料</th>\n",
       "      <td>1000</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>以色列</th>\n",
       "      <th>区块链</th>\n",
       "      <td>535</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"4\" valign=\"top\">印度</th>\n",
       "      <th>共享经济</th>\n",
       "      <td>480</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>快递</th>\n",
       "      <td>720</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>教育科技</th>\n",
       "      <td>1500</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>游戏</th>\n",
       "      <td>535</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">印度尼西亚</th>\n",
       "      <th>共享经济</th>\n",
       "      <td>700</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>电子商务</th>\n",
       "      <td>1300</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>土耳其</th>\n",
       "      <th>快递</th>\n",
       "      <td>800</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>墨西哥</th>\n",
       "      <th>电子商务</th>\n",
       "      <td>580</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>巴哈马</th>\n",
       "      <th>区块链</th>\n",
       "      <td>1300</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>德国</th>\n",
       "      <th>软件服务</th>\n",
       "      <td>555</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>澳大利亚</th>\n",
       "      <th>软件服务</th>\n",
       "      <td>1750</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">瑞典</th>\n",
       "      <th>新能源</th>\n",
       "      <td>800</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>新能源汽车</th>\n",
       "      <td>1300</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>瑞士</th>\n",
       "      <th>区块链</th>\n",
       "      <td>575</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"19\" valign=\"top\">美国</th>\n",
       "      <th>人工智能</th>\n",
       "      <td>2990</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>企业服务</th>\n",
       "      <td>1170</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>健康科技</th>\n",
       "      <td>1310</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>共享经济</th>\n",
       "      <td>1000</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>分析</th>\n",
       "      <td>575</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>区块链</th>\n",
       "      <td>1970</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>大数据</th>\n",
       "      <td>2500</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>快递</th>\n",
       "      <td>2320</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>机器人</th>\n",
       "      <td>575</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>游戏</th>\n",
       "      <td>600</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>物流</th>\n",
       "      <td>1735</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>生物科技</th>\n",
       "      <td>1340</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>电子商务</th>\n",
       "      <td>1330</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>社交媒体</th>\n",
       "      <td>1000</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>网络安全</th>\n",
       "      <td>1690</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>航天</th>\n",
       "      <td>8400</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>软件服务</th>\n",
       "      <td>4430</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>金融科技</th>\n",
       "      <td>12335</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>食品饮料</th>\n",
       "      <td>470</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">英国</th>\n",
       "      <th>区块链</th>\n",
       "      <td>700</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>软件服务</th>\n",
       "      <td>1090</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>金融科技</th>\n",
       "      <td>4785</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>越南</th>\n",
       "      <th>消费品</th>\n",
       "      <td>550</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">韩国</th>\n",
       "      <th>区块链</th>\n",
       "      <td>535</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>电子商务</th>\n",
       "      <td>560</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>马耳他</th>\n",
       "      <th>区块链</th>\n",
       "      <td>3000</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             价值（亿元人民币）  行业\n",
       "国家    行业                  \n",
       "中国    人工智能         570   1\n",
       "      企业服务         515   1\n",
       "      保险           740   1\n",
       "      健康科技        1510   2\n",
       "      共享经济         965   1\n",
       "      大数据          535   1\n",
       "      数字科技        2000   1\n",
       "      新能源         1770   3\n",
       "      新能源汽车        600   1\n",
       "      新零售          670   1\n",
       "      机器人         1200   1\n",
       "      物流          3170   3\n",
       "      电子商务        5340   3\n",
       "      社交媒体       13400   1\n",
       "      软件服务        1870   2\n",
       "      金融科技       10200   2\n",
       "      食品饮料        1000   1\n",
       "以色列   区块链          535   1\n",
       "印度    共享经济         480   1\n",
       "      快递           720   1\n",
       "      教育科技        1500   1\n",
       "      游戏           535   1\n",
       "印度尼西亚 共享经济         700   1\n",
       "      电子商务        1300   1\n",
       "土耳其   快递           800   1\n",
       "墨西哥   电子商务         580   1\n",
       "巴哈马   区块链         1300   1\n",
       "德国    软件服务         555   1\n",
       "澳大利亚  软件服务        1750   1\n",
       "瑞典    新能源          800   1\n",
       "      新能源汽车       1300   1\n",
       "瑞士    区块链          575   1\n",
       "美国    人工智能        2990   5\n",
       "      企业服务        1170   1\n",
       "      健康科技        1310   2\n",
       "      共享经济        1000   1\n",
       "      分析           575   1\n",
       "      区块链         1970   3\n",
       "      大数据         2500   1\n",
       "      快递          2320   2\n",
       "      机器人          575   1\n",
       "      游戏           600   1\n",
       "      物流          1735   2\n",
       "      生物科技        1340   2\n",
       "      电子商务        1330   2\n",
       "      社交媒体        1000   1\n",
       "      网络安全        1690   3\n",
       "      航天          8400   1\n",
       "      软件服务        4430   8\n",
       "      金融科技       12335  11\n",
       "      食品饮料         470   1\n",
       "英国    区块链          700   1\n",
       "      软件服务        1090   2\n",
       "      金融科技        4785   4\n",
       "越南    消费品          550   1\n",
       "韩国    区块链          535   1\n",
       "      电子商务         560   1\n",
       "马耳他   区块链         3000   1"
      ]
     },
     "execution_count": 78,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "国家行业 = df_hurun.groupby(['国家','行业']).agg({'价值（亿元人民币）':sum,'行业':'count'})\n",
    "国家行业"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4b2cc56f",
   "metadata": {},
   "source": [
    "* 不同国家行业独角兽的对比"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "id": "caf841cf",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>价值（亿元人民币）</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>国家</th>\n",
       "      <th>行业</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"17\" valign=\"top\">中国</th>\n",
       "      <th>人工智能</th>\n",
       "      <td>570</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>企业服务</th>\n",
       "      <td>515</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>保险</th>\n",
       "      <td>740</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>健康科技</th>\n",
       "      <td>1510</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>共享经济</th>\n",
       "      <td>965</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>大数据</th>\n",
       "      <td>535</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>数字科技</th>\n",
       "      <td>2000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>新能源</th>\n",
       "      <td>1770</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>新能源汽车</th>\n",
       "      <td>600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>新零售</th>\n",
       "      <td>670</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>机器人</th>\n",
       "      <td>1200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>物流</th>\n",
       "      <td>3170</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>电子商务</th>\n",
       "      <td>5340</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>社交媒体</th>\n",
       "      <td>13400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>软件服务</th>\n",
       "      <td>1870</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>金融科技</th>\n",
       "      <td>10200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>食品饮料</th>\n",
       "      <td>1000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>以色列</th>\n",
       "      <th>区块链</th>\n",
       "      <td>535</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"4\" valign=\"top\">印度</th>\n",
       "      <th>共享经济</th>\n",
       "      <td>480</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>快递</th>\n",
       "      <td>720</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>教育科技</th>\n",
       "      <td>1500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>游戏</th>\n",
       "      <td>535</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">印度尼西亚</th>\n",
       "      <th>共享经济</th>\n",
       "      <td>700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>电子商务</th>\n",
       "      <td>1300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>土耳其</th>\n",
       "      <th>快递</th>\n",
       "      <td>800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>墨西哥</th>\n",
       "      <th>电子商务</th>\n",
       "      <td>580</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>巴哈马</th>\n",
       "      <th>区块链</th>\n",
       "      <td>1300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>德国</th>\n",
       "      <th>软件服务</th>\n",
       "      <td>555</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>澳大利亚</th>\n",
       "      <th>软件服务</th>\n",
       "      <td>1750</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">瑞典</th>\n",
       "      <th>新能源</th>\n",
       "      <td>800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>新能源汽车</th>\n",
       "      <td>1300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>瑞士</th>\n",
       "      <th>区块链</th>\n",
       "      <td>575</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"19\" valign=\"top\">美国</th>\n",
       "      <th>人工智能</th>\n",
       "      <td>2990</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>企业服务</th>\n",
       "      <td>1170</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>健康科技</th>\n",
       "      <td>1310</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>共享经济</th>\n",
       "      <td>1000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>分析</th>\n",
       "      <td>575</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>区块链</th>\n",
       "      <td>1970</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>大数据</th>\n",
       "      <td>2500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>快递</th>\n",
       "      <td>2320</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>机器人</th>\n",
       "      <td>575</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>游戏</th>\n",
       "      <td>600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>物流</th>\n",
       "      <td>1735</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>生物科技</th>\n",
       "      <td>1340</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>电子商务</th>\n",
       "      <td>1330</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>社交媒体</th>\n",
       "      <td>1000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>网络安全</th>\n",
       "      <td>1690</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>航天</th>\n",
       "      <td>8400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>软件服务</th>\n",
       "      <td>4430</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>金融科技</th>\n",
       "      <td>12335</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>食品饮料</th>\n",
       "      <td>470</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">英国</th>\n",
       "      <th>区块链</th>\n",
       "      <td>700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>软件服务</th>\n",
       "      <td>1090</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>金融科技</th>\n",
       "      <td>4785</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>越南</th>\n",
       "      <th>消费品</th>\n",
       "      <td>550</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">韩国</th>\n",
       "      <th>区块链</th>\n",
       "      <td>535</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>电子商务</th>\n",
       "      <td>560</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>马耳他</th>\n",
       "      <th>区块链</th>\n",
       "      <td>3000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             价值（亿元人民币）\n",
       "国家    行业              \n",
       "中国    人工智能         570\n",
       "      企业服务         515\n",
       "      保险           740\n",
       "      健康科技        1510\n",
       "      共享经济         965\n",
       "      大数据          535\n",
       "      数字科技        2000\n",
       "      新能源         1770\n",
       "      新能源汽车        600\n",
       "      新零售          670\n",
       "      机器人         1200\n",
       "      物流          3170\n",
       "      电子商务        5340\n",
       "      社交媒体       13400\n",
       "      软件服务        1870\n",
       "      金融科技       10200\n",
       "      食品饮料        1000\n",
       "以色列   区块链          535\n",
       "印度    共享经济         480\n",
       "      快递           720\n",
       "      教育科技        1500\n",
       "      游戏           535\n",
       "印度尼西亚 共享经济         700\n",
       "      电子商务        1300\n",
       "土耳其   快递           800\n",
       "墨西哥   电子商务         580\n",
       "巴哈马   区块链         1300\n",
       "德国    软件服务         555\n",
       "澳大利亚  软件服务        1750\n",
       "瑞典    新能源          800\n",
       "      新能源汽车       1300\n",
       "瑞士    区块链          575\n",
       "美国    人工智能        2990\n",
       "      企业服务        1170\n",
       "      健康科技        1310\n",
       "      共享经济        1000\n",
       "      分析           575\n",
       "      区块链         1970\n",
       "      大数据         2500\n",
       "      快递          2320\n",
       "      机器人          575\n",
       "      游戏           600\n",
       "      物流          1735\n",
       "      生物科技        1340\n",
       "      电子商务        1330\n",
       "      社交媒体        1000\n",
       "      网络安全        1690\n",
       "      航天          8400\n",
       "      软件服务        4430\n",
       "      金融科技       12335\n",
       "      食品饮料         470\n",
       "英国    区块链          700\n",
       "      软件服务        1090\n",
       "      金融科技        4785\n",
       "越南    消费品          550\n",
       "韩国    区块链          535\n",
       "      电子商务         560\n",
       "马耳他   区块链         3000"
      ]
     },
     "execution_count": 79,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "行业国家 = df_hurun.groupby(['国家','行业']).agg({'价值（亿元人民币）':sum})\n",
    "行业国家"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6c23da4a",
   "metadata": {},
   "source": [
    "## Groupby对象"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "id": "04263b0f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<pandas.core.groupby.generic.DataFrameGroupBy object at 0x000001FF1B1B0550>"
      ]
     },
     "execution_count": 80,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gb = df_hurun.groupby(['行业','国家'])\n",
    "gb"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "id": "4b847d98",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "58"
      ]
     },
     "execution_count": 81,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#分组\n",
    "gb.ngroups"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "id": "586c208e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dict_keys([('人工智能', '中国'), ('人工智能', '美国'), ('企业服务', '中国'), ('企业服务', '美国'), ('保险', '中国'), ('健康科技', '中国'), ('健康科技', '美国'), ('共享经济', '中国'), ('共享经济', '印度'), ('共享经济', '印度尼西亚'), ('共享经济', '美国'), ('分析', '美国'), ('区块链', '以色列'), ('区块链', '巴哈马'), ('区块链', '瑞士'), ('区块链', '美国'), ('区块链', '英国'), ('区块链', '韩国'), ('区块链', '马耳他'), ('大数据', '中国'), ('大数据', '美国'), ('快递', '印度'), ('快递', '土耳其'), ('快递', '美国'), ('教育科技', '印度'), ('数字科技', '中国'), ('新能源', '中国'), ('新能源', '瑞典'), ('新能源汽车', '中国'), ('新能源汽车', '瑞典'), ('新零售', '中国'), ('机器人', '中国'), ('机器人', '美国'), ('消费品', '越南'), ('游戏', '印度'), ('游戏', '美国'), ('物流', '中国'), ('物流', '美国'), ('生物科技', '美国'), ('电子商务', '中国'), ('电子商务', '印度尼西亚'), ('电子商务', '墨西哥'), ('电子商务', '美国'), ('电子商务', '韩国'), ('社交媒体', '中国'), ('社交媒体', '美国'), ('网络安全', '美国'), ('航天', '美国'), ('软件服务', '中国'), ('软件服务', '德国'), ('软件服务', '澳大利亚'), ('软件服务', '美国'), ('软件服务', '英国'), ('金融科技', '中国'), ('金融科技', '美国'), ('金融科技', '英国'), ('食品饮料', '中国'), ('食品饮料', '美国')])"
      ]
     },
     "execution_count": 82,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "res = gb.groups\n",
    "res.keys()# 查看分组情况"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "id": "627b2ea8",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "行业     国家   \n",
       "人工智能   中国        1\n",
       "       美国        5\n",
       "企业服务   中国        1\n",
       "       美国        1\n",
       "保险     中国        1\n",
       "健康科技   中国        2\n",
       "       美国        2\n",
       "共享经济   中国        1\n",
       "       印度        1\n",
       "       印度尼西亚     1\n",
       "       美国        1\n",
       "分析     美国        1\n",
       "区块链    以色列       1\n",
       "       巴哈马       1\n",
       "       瑞士        1\n",
       "       美国        3\n",
       "       英国        1\n",
       "       韩国        1\n",
       "       马耳他       1\n",
       "大数据    中国        1\n",
       "       美国        1\n",
       "快递     印度        1\n",
       "       土耳其       1\n",
       "       美国        2\n",
       "教育科技   印度        1\n",
       "数字科技   中国        1\n",
       "新能源    中国        3\n",
       "       瑞典        1\n",
       "新能源汽车  中国        1\n",
       "       瑞典        1\n",
       "新零售    中国        1\n",
       "机器人    中国        1\n",
       "       美国        1\n",
       "消费品    越南        1\n",
       "游戏     印度        1\n",
       "       美国        1\n",
       "物流     中国        3\n",
       "       美国        2\n",
       "生物科技   美国        2\n",
       "电子商务   中国        3\n",
       "       印度尼西亚     1\n",
       "       墨西哥       1\n",
       "       美国        2\n",
       "       韩国        1\n",
       "社交媒体   中国        1\n",
       "       美国        1\n",
       "网络安全   美国        3\n",
       "航天     美国        1\n",
       "软件服务   中国        2\n",
       "       德国        1\n",
       "       澳大利亚      1\n",
       "       美国        8\n",
       "       英国        2\n",
       "金融科技   中国        2\n",
       "       美国       11\n",
       "       英国        4\n",
       "食品饮料   中国        1\n",
       "       美国        1\n",
       "dtype: int64"
      ]
     },
     "execution_count": 83,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gb.size()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bca9b9de",
   "metadata": {},
   "source": [
    "##  聚合函数！\n",
    "agg方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "id": "7b2647ff",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>价值（亿元人民币）</th>\n",
       "      <th>行业</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>行业</th>\n",
       "      <th>城市</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"4\" valign=\"top\">人工智能</th>\n",
       "      <th>哈里斯堡</th>\n",
       "      <td>500</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>圣何塞</th>\n",
       "      <td>460</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>广州</th>\n",
       "      <td>570</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>旧金山</th>\n",
       "      <td>2030</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>企业服务</th>\n",
       "      <th>宿迁</th>\n",
       "      <td>515</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">金融科技</th>\n",
       "      <th>深圳</th>\n",
       "      <td>2200</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>纽约</th>\n",
       "      <td>1000</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>芝加哥</th>\n",
       "      <td>1500</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">食品饮料</th>\n",
       "      <th>北京</th>\n",
       "      <td>1000</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>雷德伍德城</th>\n",
       "      <td>470</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>82 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "            价值（亿元人民币）  行业\n",
       "行业   城市                  \n",
       "人工智能 哈里斯堡         500   1\n",
       "     圣何塞          460   1\n",
       "     广州           570   1\n",
       "     旧金山         2030   3\n",
       "企业服务 宿迁           515   1\n",
       "...               ...  ..\n",
       "金融科技 深圳          2200   1\n",
       "     纽约          1000   2\n",
       "     芝加哥         1500   1\n",
       "食品饮料 北京          1000   1\n",
       "     雷德伍德城        470   1\n",
       "\n",
       "[82 rows x 2 columns]"
      ]
     },
     "execution_count": 84,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "行业城市 = df_hurun.groupby(['行业','城市']).agg({'价值（亿元人民币）':sum,'行业':'count'})\n",
    "行业城市"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ab7e7473",
   "metadata": {},
   "source": [
    "* 聚合函数的重命名"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "id": "e8a3311f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>价值_亿</th>\n",
       "      <th>企业数量</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>行业</th>\n",
       "      <th>城市</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"4\" valign=\"top\">人工智能</th>\n",
       "      <th>哈里斯堡</th>\n",
       "      <td>500</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>圣何塞</th>\n",
       "      <td>460</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>广州</th>\n",
       "      <td>570</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>旧金山</th>\n",
       "      <td>2030</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>企业服务</th>\n",
       "      <th>宿迁</th>\n",
       "      <td>515</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">金融科技</th>\n",
       "      <th>深圳</th>\n",
       "      <td>2200</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>纽约</th>\n",
       "      <td>1000</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>芝加哥</th>\n",
       "      <td>1500</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">食品饮料</th>\n",
       "      <th>北京</th>\n",
       "      <td>1000</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>雷德伍德城</th>\n",
       "      <td>470</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>82 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "            价值_亿  企业数量\n",
       "行业   城市               \n",
       "人工智能 哈里斯堡    500     1\n",
       "     圣何塞     460     1\n",
       "     广州      570     1\n",
       "     旧金山    2030     3\n",
       "企业服务 宿迁      515     1\n",
       "...          ...   ...\n",
       "金融科技 深圳     2200     1\n",
       "     纽约     1000     2\n",
       "     芝加哥    1500     1\n",
       "食品饮料 北京     1000     1\n",
       "     雷德伍德城   470     1\n",
       "\n",
       "[82 rows x 2 columns]"
      ]
     },
     "execution_count": 85,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "行业城市_重命名 = df_hurun.groupby(['行业','城市']).agg( 价值_亿 = ('价值（亿元人民币）','sum'),企业数量 = ('行业',\"count\"))\n",
    "行业城市_重命名"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "031c9d8e",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "id": "c06df258",
   "metadata": {},
   "source": [
    "## 正确胡输出数据分析结果"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "id": "731efec2",
   "metadata": {},
   "outputs": [],
   "source": [
    "with pd.ExcelWriter('output_hurun_analysis_results.xlsx')as writer:\n",
    "    国家行业.to_excel(writer,sheet_name = '国家行业')\n",
    "    行业国家.to_excel(writer,sheet_name = '行业国家')\n",
    "    行业城市.to_excel(writer,sheet_name = '行业城市')\n",
    "    行业城市_重命名.to_excel(writer,sheet_name = '行业城市_重命名')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4c7af1e3",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ddb6690c",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
